#!/usr/bin/env python

'''
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''

# NOTE: This script is executed by Python 2.4 on Centos 5. 
# Make sure your changes are compatible.

import cgi
import glob
import os
import re
import rrdtool
import sys
import time
import urlparse

# place this script in /var/www/cgi-bin of the Ganglia collector
# requires 'yum install rrdtool-python' on the Ganglia collector
'''
  Loads rrd file info
'''
def loadRRDData(file, cf, start, end, resolution):
  args = [file, cf, "--daemon", "unix:{{ganglia_runtime_dir}}/rrdcached.limited.sock"]

  if start is not None:
    args.extend(["-s", start])
  else:
    args.extend(["-s", "now-10m"])

  if end is not None:
    args.extend(["-e", end])

  if resolution is not None:
    args.extend(["-r", resolution])

  return rrdtool.fetch(args)

'''
  Collects metrics across several matching filenames.
'''
def collectStatMetrics(clusterName, hostName, metricName, files, cf, start, end, resolution):
  if clusterName[0] is not '/':
    clusterName.insert(0, '/')

  metricParts = metricName.split('.')

  # already know there's at least one
  metricStat = metricParts[-1]
  metricName = '.'.join(metricParts[:-1])

  isRate = False
  if len(metricParts) > 1 and metricParts[-2] == '_rate':
    isRate = True
    metricName = '.'.join(metricParts[:-2])

  pattern = re.compile(metricName + '\.rrd$')
  matchedFiles = filter(pattern.match, files)

  parentPath = os.path.join(*clusterName)

  actualFiles = []
  for matchedFile in matchedFiles:
    if hostName != "__SummaryInfo__":
      osFiles = glob.glob(os.path.join(parentPath, hostName, matchedFile))
    else:
      osFiles = glob.glob(os.path.join(parentPath, '*', matchedFile))

    for f in osFiles:
      if -1 == f.find("__SummaryInfo__"):
        actualFiles.append(f)

  if len(actualFiles) == 0:
    return

  '''
  [
    {
      "step_value": update each iteration
      "count": increase by 1 each iteration
      "sum": increase by value each iteration
      "avg": update each iteration as sum/count
      "min": update each iteration if step_value < old min OR min is missing (first time)
      "max": update each iteration if step_value > old max OR max is missing (first time)
    }
  ]
  '''

  timestamp = None
  stepsize = None
  concreteMetricName = None
  vals = None # values across all files

  for file in actualFiles:
    rrdMetric = loadRRDData(file, cf, start, end, resolution)
    
    if timestamp is None and stepsize is None and concreteMetricName is None:
      timestamp = rrdMetric[0][0]
      stepsize = rrdMetric[0][2]
      
      if not isRate:
        suffix = metricStat
      else:
        suffix = '_rate.' + metricStat
      
      concreteMetricName = file.split(os.sep).pop().replace('rrd', suffix)

    metricValues = rrdMetric[2]

    if vals is None:
      vals = [None] * len(metricValues)

    i = 0
    for tuple in metricValues:
      if vals[i] is None:
        vals[i] = {}
        vals[i]['count'] = 0
        vals[i]['_sum'] = 0.0
        vals[i]['_avg'] = 0.0
        vals[i]['_min'] = 999999999999.99
        vals[i]['_max'] = 0.0

      rawValue = tuple[0]
      vals[i]['step_value'] = rawValue
      if rawValue is None:
        i += 1
        continue

      if isRate:
        if 0 == i:
          rawValue = 0.0
        elif vals[i-1]['step_value'] is None:
          rawValue = 0.0
        else:
          rawValue = (rawValue - vals[i-1]['step_value']) / stepsize
      
      vals[i]['count'] += 1 
      vals[i]['_sum'] += rawValue

      vals[i]['_avg'] = vals[i]['_sum']/vals[i]['count']

      if rawValue < vals[i]['_min']:
        vals[i]['_min'] = rawValue

      if rawValue > vals[i]['_max']:
        vals[i]['_max'] = rawValue
      
      i += 1

  sys.stdout.write("sum\n")
  sys.stdout.write(clusterName[len(clusterName)-1] + "\n")
  sys.stdout.write(hostName + "\n")
  sys.stdout.write(concreteMetricName + "\n")
  sys.stdout.write(str(timestamp) + "\n")
  sys.stdout.write(str(stepsize) + "\n")

  for val in vals:
    if val['step_value'] is None:
      sys.stdout.write("[~n]")
    else:
      sys.stdout.write(str(val[metricStat]))
    sys.stdout.write("\n")

  sys.stdout.write("[~EOM]\n")

  return

def printMetric(clusterName, hostName, metricName, file, cf, start, end,
                resolution, pointInTime):
  if clusterName.endswith("rrds"):
    clusterName = ""
 
  rrdMetric = loadRRDData(file, cf, start, end, resolution)

  # ds_name
  sys.stdout.write(rrdMetric[1][0])
  sys.stdout.write("\n")

  sys.stdout.write(clusterName)
  sys.stdout.write("\n")
  sys.stdout.write(hostName)
  sys.stdout.write("\n")
  sys.stdout.write(metricName)
  sys.stdout.write("\n")

  # write time
  sys.stdout.write(str(rrdMetric[0][0]))
  sys.stdout.write("\n")
  # write step
  sys.stdout.write(str(rrdMetric[0][2]))
  sys.stdout.write("\n")

  if not pointInTime:
    valueCount = 0
    lastValue = None

    for tuple in rrdMetric[2]:

      thisValue = tuple[0]

      if valueCount > 0 and thisValue == lastValue:
        valueCount += 1
      else:
        if valueCount > 1:
          sys.stdout.write("[~r]")
          sys.stdout.write(str(valueCount))
          sys.stdout.write("\n")

        if thisValue is None:
          sys.stdout.write("[~n]\n")
        else:
          sys.stdout.write(str(thisValue))
          sys.stdout.write("\n")

        valueCount = 1
        lastValue = thisValue
  else:
    value = None
    idx = -1
    tuple = rrdMetric[2]
    tupleLastIdx = len(tuple) * -1

    while value is None and idx >= tupleLastIdx:
      value = tuple[idx][0]
      idx -= 1

    if value is not None:
      sys.stdout.write(str(value))
      sys.stdout.write("\n")

  sys.stdout.write("[~EOM]\n")
  return


def stripList(l):
  return ([x.strip() for x in l])


sys.stdout.write("Content-type: text/plain\n\n")

# write start time
sys.stdout.write(str(time.mktime(time.gmtime())))
sys.stdout.write("\n")

requestMethod = os.environ['REQUEST_METHOD']

if requestMethod == 'POST':
  postData = sys.stdin.readline()
  queryString = cgi.parse_qs(postData)
  queryString = dict((k, v[0]) for k, v in queryString.items())
elif requestMethod == 'GET':
  queryString = dict(cgi.parse_qsl(os.environ['QUERY_STRING']));

if "m" in queryString:
  metricParts = queryString["m"].split(",")
else:
  metricParts = [""]
metricParts = stripList(metricParts)

hostParts = []
if "h" in queryString:
  hostParts = queryString["h"].split(",")
hostParts = stripList(hostParts)

if "c" in queryString:
  clusterParts = queryString["c"].split(",")
else:
  clusterParts = [""]
clusterParts = stripList(clusterParts)

if "p" in queryString:
  rrdPath = queryString["p"]
else:
  rrdPath = "{{rrdcached_base_dir}}"

start = None
if "s" in queryString:
  start = queryString["s"]

end = None
if "e" in queryString:
  end = queryString["e"]

resolution = None
if "r" in queryString:
  resolution = queryString["r"]

if "cf" in queryString:
  cf = queryString["cf"]
else:
  cf = "AVERAGE"

if "pt" in queryString:
  pointInTime = True
else:
  pointInTime = False


def _walk(*args, **kwargs):
  for root, dirs, files in os.walk(*args, **kwargs):
    for dir in dirs:
      qualified_dir = os.path.join(root, dir)
      if os.path.islink(qualified_dir):
        for x in os.walk(qualified_dir, **kwargs):
          yield x
    yield (root, dirs, files)


for cluster in clusterParts:
  for path, dirs, files in _walk(os.path.join(rrdPath,cluster)):
    pathParts = path.split("/")
    #Process only path which contains files. If no host parameter passed - process all hosts folders and summary info
    #If host parameter passed - process only this host folder
    if len(files) > 0 and (len(hostParts) == 0 or pathParts[-1] in hostParts):
      for metric in metricParts:
        file = metric + ".rrd"
        fileFullPath = os.path.join(path, file)
        if os.path.exists(fileFullPath):
          #Exact name of metric
          printMetric(pathParts[-2], pathParts[-1], file[:-4],
                      os.path.join(path, file), cf, start, end, resolution,
                      pointInTime)
        else:
          need_stats = False
          parts = metric.split(".")
          if len(parts) > 0 and parts[-1] in ['_min', '_max', '_avg', '_sum']:
              need_stats = True

          if need_stats and not pointInTime:
            collectStatMetrics(pathParts[:-1], pathParts[-1], metric, files, cf, start, end, resolution)
          else:
            #Regex as metric name
            metricRegex = metric + '\.rrd$'
            p = re.compile(metricRegex)
            matchedFiles = filter(p.match, files)
            for matchedFile in matchedFiles:
              printMetric(pathParts[-2], pathParts[-1], matchedFile[:-4],
                         os.path.join(path, matchedFile), cf, start, end,
                         resolution, pointInTime)

sys.stdout.write("[~EOF]\n")
# write end time
sys.stdout.write(str(time.mktime(time.gmtime())))
sys.stdout.write("\n")

sys.stdout.flush
